z_test_hypothesis <- function(s1, s2, f1, f2){
p_total <- (s1+s2)/(s1+f1+s2+f2)
p1_hat <- s1/(s1+f1)
p2_hat <- s2/(s2+f2)
p_hat <- ((s1+f1)*p1_hat + (s2+f2)*p2_hat)/(s1+f1+s2+f2)
z_obs <- (p1_hat-p2_hat)/sqrt(p_hat*(1-p_hat)*(1/(s1+f1) + 1/(s1+f1)))
p_value <- 1-pnorm(abs(z_obs))
return(c(p_total, p1_hat, p2_hat, p_value))
}
extract_day_month <- function(tibble){
tibble %>%
mutate(
day = substr(Date, 1, 2),
day = gsub("-", "", day),
month = substr(Date, str_length(Date)-1, str_length(Date)),
year = 2020,
Date = as.Date(paste(year, month, day, sep="-"), "%Y-%m-%d")
) %>%
select(-day, -month, -year)
}bank_A <- read_csv("data/Bank_A.csv")
bank_A <- bank_A %>%
extract_day_month()
bank_A <- bank_A %>%
mutate(
TheDangOTrangThaiKhongGiaoDichDuoc = gsub("-", "0", TheDangOTrangThaiKhongGiaoDichDuoc),
SaiOTP = gsub("-", "0", SaiOTP),
TheDaLienKet = gsub("-", "0", TheDaLienKet)
) %>%
mutate(
TheDangOTrangThaiKhongGiaoDichDuoc = as.numeric(TheDangOTrangThaiKhongGiaoDichDuoc),
SaiOTP = as.numeric(SaiOTP),
TheDaLienKet = as.numeric(TheDaLienKet)
)
glimpse(bank_A)## Rows: 60
## Columns: 11
## $ Date <date> 2020-04-01, 2020-04-02, 2020-04-03~
## $ TheChuaBatEcom <dbl> 511, 375, 463, 418, 473, 520, 561, ~
## $ GiaoDichThanhCong <dbl> 317, 220, 266, 256, 295, 215, 314, ~
## $ SaiSoDienThoai <dbl> 71, 57, 92, 69, 62, 99, 120, 73, 72~
## $ SaiTenChuThe <dbl> 62, 52, 66, 40, 61, 69, 60, 48, 54,~
## $ SaiSoCMND <dbl> 15, 12, 22, 15, 21, 34, 40, 31, 26,~
## $ SaiNgayPhatHanhThe <dbl> 14, 10, 13, 18, 35, 24, 27, 15, 8, ~
## $ KhongNhanDuocXacThucOTP <dbl> 24, 9, 17, 9, 11, 124, 33, 15, 11, ~
## $ TheDangOTrangThaiKhongGiaoDichDuoc <dbl> 8, 4, 3, 1, 3, 5, 13, 2, 1, 7, 7, 7~
## $ SaiOTP <dbl> 3, 3, 3, 1, 6, 22, 7, 2, 4, 2, 2, 1~
## $ TheDaLienKet <dbl> 0, 0, 0, 3, 4, 0, 0, 0, 0, 0, 1, 1,~
bank_B <- read_csv("data/Bank_B.csv")
bank_B <- bank_B %>%
extract_day_month()
bank_B <- bank_B %>%
mutate(
SaiThongTinKhachHang = gsub("-", "0", SaiThongTinKhachHang),
TaiKhoanNganHangBiKhoa = gsub("-", "0", TaiKhoanNganHangBiKhoa),
HeThongDangCoLoi = gsub("-", "0", HeThongDangCoLoi),
TheKhongHoTro = gsub("-", "0", TheKhongHoTro),
OTPHetHan = gsub("-", "0", OTPHetHan),
TheDaLienKet = gsub("-", "0", TheDaLienKet),
) %>%
mutate(
SaiThongTinKhachHang = as.numeric(SaiThongTinKhachHang),
TaiKhoanNganHangBiKhoa =as.numeric(TaiKhoanNganHangBiKhoa),
HeThongDangCoLoi = as.numeric(HeThongDangCoLoi),
TheKhongHoTro = as.numeric(TheKhongHoTro),
OTPHetHan = as.numeric(OTPHetHan),
TheDaLienKet = as.numeric(TheDaLienKet),
)
glimpse(bank_B)## Rows: 60
## Columns: 13
## $ Date <date> 2020-04-01, 2020-04-02, 2020-04-03, 2020-04-~
## $ GiaoDichThanhCong <dbl> 205, 168, 186, 184, 205, 199, 229, 202, 189, ~
## $ KhongNhanDuocXacThucOTP <dbl> 42, 37, 59, 41, 40, 94, 48, 53, 64, 72, 96, 6~
## $ HeThongNganHangDangCoLoi <dbl> 94, 40, 48, 55, 39, 64, 42, 40, 52, 80, 102, ~
## $ KhongTimThayThongTinThe <dbl> 42, 31, 34, 54, 50, 41, 53, 63, 34, 73, 55, 6~
## $ TheChuaBatEcom <dbl> 54, 22, 30, 34, 37, 45, 34, 24, 22, 60, 44, 4~
## $ SaiOTP <dbl> 9, 7, 15, 15, 13, 21, 12, 13, 26, 30, 15, 14,~
## $ SaiThongTinKhachHang <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 14, 15, 1~
## $ TaiKhoanNganHangBiKhoa <dbl> 5, 2, 7, 2, 5, 1, 2, 5, 2, 2, 7, 5, 5, 1, 10,~
## $ HeThongDangCoLoi <dbl> 14, 9, 6, 7, 11, 19, 11, 21, 9, 12, 11, 6, 0,~
## $ TheKhongHoTro <dbl> 6, 1, 0, 3, 0, 2, 1, 1, 2, 1, 1, 6, 0, 5, 1, ~
## $ OTPHetHan <dbl> 1, 0, 0, 2, 2, 3, 3, 1, 3, 5, 4, 3, 2, 7, 3, ~
## $ TheDaLienKet <dbl> 0, 0, 0, 0, 5, 0, 7, 0, 2, 4, 0, 5, 5, 1, 3, ~
bank_C <- read_csv("data/Bank_C.csv")
bank_C <- bank_C %>%
extract_day_month()
bank_C <- bank_C %>%
mutate(
GiaoDichDangXuLy = gsub("-", "0", GiaoDichDangXuLy),
ThongTinXacThucKhongHopLe = gsub("-", "0", ThongTinXacThucKhongHopLe),
) %>%
mutate(
KhongNhanDuocPhanHoiTuNganHang = as.numeric(KhongNhanDuocPhanHoiTuNganHang),
GiaoDichDangXuLy = as.numeric(GiaoDichDangXuLy),
ThongTinXacThucKhongHopLe =as.numeric(ThongTinXacThucKhongHopLe),
)
glimpse(bank_C)## Rows: 60
## Columns: 7
## $ Date <date> 2020-04-01, 2020-04-02, 2020-04-03, 20~
## $ KhongNhanDuocPhanHoiTuNganHang <dbl> 972, 683, 721, 850, 821, 780, 897, 685,~
## $ GiaoDichThanhCong <dbl> 450, 296, 408, 364, 386, 200, 396, 312,~
## $ LienKetThatBai <dbl> 174, 119, 128, 147, 136, 84, 146, 110, ~
## $ TheDaLienKet <dbl> 26, 18, 28, 43, 30, 14, 41, 20, 23, 58,~
## $ GiaoDichDangXuLy <dbl> 7, 3, 0, 5, 7, 1, 3, 1, 4, 4, 3, 4, 2, ~
## $ ThongTinXacThucKhongHopLe <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 18, 1, 2, 2,~
bank_D <- read_csv("data/Bank_D.csv")
bank_D <- bank_D %>%
extract_day_month()
bank_D <- bank_D %>%
mutate(
KhachHangHuyGiaoDich = gsub("-", "0", KhachHangHuyGiaoDich),
ThongTinDangNhapKhongDung = gsub("-", "0", ThongTinDangNhapKhongDung),
HeThongNganHangDangCoLoi = gsub("-", "0", HeThongNganHangDangCoLoi),
TheDaLienKet = gsub("-", "0", TheDaLienKet),
HeThongNganHangDangCoLoi2 = gsub("-", "0", HeThongNganHangDangCoLoi2),
OTPHetHan = gsub("-", "0", OTPHetHan),
) %>%
mutate(
KhachHangHuyGiaoDich = as.numeric(KhachHangHuyGiaoDich),
ThongTinDangNhapKhongDung =as.numeric(ThongTinDangNhapKhongDung),
HeThongNganHangDangCoLoi = as.numeric(HeThongNganHangDangCoLoi),
TheDaLienKet = as.numeric(TheDaLienKet),
HeThongNganHangDangCoLoi2 = as.numeric(HeThongNganHangDangCoLoi2),
OTPHetHan = as.numeric(OTPHetHan),
)
glimpse(bank_D)## Rows: 60
## Columns: 9
## $ Date <date> 2020-04-01, 2020-04-02, 2020-04-03, 20~
## $ KhongNhanDuocPhanHoiTuNganHang <dbl> 594, 496, 532, 513, 497, 544, 467, 482,~
## $ GiaoDichThanhCong <dbl> 135, 113, 123, 107, 109, 111, 87, 77, 7~
## $ KhachHangHuyGiaoDich <dbl> 3, 2, 2, 3, 0, 3, 3, 0, 1, 4, 1, 135, 2~
## $ ThongTinDangNhapKhongDung <dbl> 18, 23, 16, 19, 18, 15, 13, 12, 15, 22,~
## $ HeThongNganHangDangCoLoi <dbl> 0, 0, 0, 0, 1, 1, 64, 2, 0, 1, 0, 3, 0,~
## $ TheDaLienKet <dbl> 0, 0, 0, 2, 0, 1, 1, 0, 0, 1, 6, 0, 6, ~
## $ HeThongNganHangDangCoLoi2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ~
## $ OTPHetHan <dbl> 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 3,~
Tim hieu ve tinh mua vu
# cols_low_value <- c("SaiOTP", "TheDaLienKet", "TheDangOTrangThaiKhongGiaoDichDuoc")
time_series_bank_A <- bank_A %>%
pivot_longer(!Date, names_to = "name", values_to = "value")time_series_bank_B <- bank_B %>%
pivot_longer(!Date, names_to = "name", values_to = "value")time_series_bank_C <- bank_C %>%
pivot_longer(!Date, names_to = "name", values_to = "value")time_series_bank_D <- bank_D %>%
pivot_longer(!Date, names_to = "name", values_to = "value")convert_date_ts <- function(data, unit = "day"){
new_data <- data %>%
mutate(Date = floor_date(Date, unit = unit))
return(new_data)
}time_series_bank_A %>%
convert_date_ts() %>%
group_by(name) %>%
plot_time_series(
.date_var = Date,
.value = value,
.facet_ncol = 2,
.smooth_color = "#18BC9C",
.smooth_size = 0.5
)time_series_bank_B %>%
convert_date_ts() %>%
group_by(name) %>%
plot_time_series(
.date_var = Date,
.value = value,
.facet_ncol = 2,
.smooth_color = "#18BC9C",
.smooth_size = 0.5
)time_series_bank_C %>%
convert_date_ts() %>%
group_by(name) %>%
plot_time_series(
.date_var = Date,
.value = value,
.facet_ncol = 2,
.smooth_color = "#18BC9C",
.smooth_size = 0.5
)time_series_bank_D %>%
convert_date_ts() %>%
group_by(name) %>%
plot_time_series(
.date_var = Date,
.value = value,
.facet_ncol = 2,
.smooth_color = "#18BC9C",
.smooth_size = 0.5
)time_series_bank_A %>%
convert_date_ts(unit = "week") %>%
group_by(name) %>%
plot_time_series(
.date_var = Date,
.value = value,
.facet_ncol = 2,
.smooth_color = "#18BC9C",
.smooth_size = 0.5
)time_series_bank_A %>%
convert_date_ts(unit = "month") %>%
group_by(name) %>%
plot_time_series(
.date_var = Date,
.value = value,
.facet_ncol = 2,
.smooth_color = "#18BC9C",
.smooth_size = 0.5
)time_series_bank_A %>%
group_by(name) %>%
plot_anomaly_diagnostics(
.date = Date,
.value = value,
.facet_ncol = 2,
.interactive=FALSE,
.title = "Anomaly Diagnostics Dow Jones",
.anom_color ="#FB3029",
.max_anomalies = 0.07,
.alpha = 0.05
)time_series_bank_B %>%
group_by(name) %>%
plot_anomaly_diagnostics(
.date = Date,
.value = value,
.facet_ncol = 2,
.interactive=FALSE,
.title = "Anomaly Diagnostics Dow Jones",
.anom_color ="#FB3029",
.max_anomalies = 0.07,
.alpha = 0.05
)time_series_bank_C %>%
group_by(name) %>%
plot_anomaly_diagnostics(
.date = Date,
.value = value,
.facet_ncol = 2,
.interactive=FALSE,
.title = "Anomaly Diagnostics Dow Jones",
.anom_color ="#FB3029",
.max_anomalies = 0.07,
.alpha = 0.05
)time_series_bank_D %>%
group_by(name) %>%
plot_anomaly_diagnostics(
.date = Date,
.value = value,
.facet_ncol = 2,
.interactive=FALSE,
.title = "Anomaly Diagnostics Dow Jones",
.anom_color ="#FB3029",
.max_anomalies = 0.07,
.alpha = 0.05
)time_series_bank_A %>%
plot_seasonal_diagnostics(
.date_var = Date,
.value = value,
.interactive = FALSE,
.geom_color = "#4E79A7"
)time_series_bank_B %>%
plot_seasonal_diagnostics(
.date_var = Date,
.value = value,
.interactive = FALSE,
.geom_color = "#4E79A7"
)time_series_bank_C %>%
plot_seasonal_diagnostics(
.date_var = Date,
.value = value,
.interactive = FALSE,
.geom_color = "#4E79A7"
)time_series_bank_D %>%
plot_seasonal_diagnostics(
.date_var = Date,
.value = value,
.interactive = FALSE,
.geom_color = "#4E79A7"
)number_of_failures_bank_A <- bank_A %>%
select(-GiaoDichThanhCong) %>%
pivot_longer(!Date, names_to = "name", values_to = "value") %>%
group_by(Date) %>%
summarise(
ThatBai = sum(value, na.rm = TRUE)
)
bank_A %>%
inner_join(number_of_failures_bank_A, by = "Date") %>%
group_by(Date) %>%
summarise(success_rate = GiaoDichThanhCong/(GiaoDichThanhCong+ThatBai)) %>%
ggplot(aes(Date, success_rate)) +
geom_line() +
geom_smooth(method = "lm")number_of_failures_bank_B <- bank_B %>%
select(-GiaoDichThanhCong) %>%
pivot_longer(!Date, names_to = "name", values_to = "value") %>%
group_by(Date) %>%
summarise(
ThatBai = sum(value, na.rm = TRUE)
)
bank_B %>%
inner_join(number_of_failures_bank_B, by = "Date") %>%
group_by(Date) %>%
summarise(success_rate = GiaoDichThanhCong/(GiaoDichThanhCong+ThatBai)) %>%
ggplot(aes(Date, success_rate)) +
geom_line() +
geom_smooth(method = "lm")number_of_failures_bank_C <- bank_C %>%
select(-GiaoDichThanhCong) %>%
pivot_longer(!Date, names_to = "name", values_to = "value") %>%
group_by(Date) %>%
summarise(
ThatBai = sum(value, na.rm = TRUE)
)
bank_C %>%
inner_join(number_of_failures_bank_C, by = "Date") %>%
group_by(Date) %>%
summarise(success_rate = GiaoDichThanhCong/(GiaoDichThanhCong+ThatBai)) %>%
ggplot(aes(Date, success_rate)) +
geom_line() +
geom_smooth(method = "lm")number_of_failures_bank_D <- bank_D %>%
select(-GiaoDichThanhCong) %>%
pivot_longer(!Date, names_to = "name", values_to = "value") %>%
group_by(Date) %>%
summarise(
ThatBai = sum(value, na.rm = TRUE)
)
bank_D %>%
inner_join(number_of_failures_bank_D, by = "Date") %>%
group_by(Date) %>%
summarise(success_rate = GiaoDichThanhCong/(GiaoDichThanhCong+ThatBai)) %>%
ggplot(aes(Date, success_rate)) +
geom_line() +
geom_smooth(method = "lm")# lag_bank_A <- bank_A %>%
# mutate(
# lag_TheChuaBatEcom = lag(TheChuaBatEcom),
# lag_GiaoDichThanhCong = lag(GiaoDichThanhCong),
# lag_SaiSoDienThoai = lag(SaiSoDienThoai),
# lag_SaiTenChuThe = lag(SaiTenChuThe),
# lag_SaiSoCMND = lag(SaiSoCMND),
# lag_SaiNgayPhatHanhThe = lag(SaiNgayPhatHanhThe),
# lag_KhongNhanDuocXacThucOTP = lag(KhongNhanDuocXacThucOTP),
# lag_TheDangOTrangThaiKhongGiaoDichDuoc = lag(TheDangOTrangThaiKhongGiaoDichDuoc),
# lag_SaiOTP = lag(SaiOTP),
# lag_TheDaLienKet = lag(TheDaLienKet)
# )# delta_bank_A <- bank_A %>%
# mutate(
# delta_TheChuaBatEcom = TheChuaBatEcom - lag(TheChuaBatEcom),
# delta_GiaoDichThanhCong = GiaoDichThanhCong - lag(GiaoDichThanhCong),
# delta_SaiSoDienThoai = SaiSoDienThoai - lag(SaiSoDienThoai),
# delta_SaiTenChuThe = SaiTenChuThe - lag(SaiTenChuThe),
# delta_SaiSoCMND = SaiSoCMND - lag(SaiSoCMND),
# delta_SaiNgayPhatHanhThe = SaiNgayPhatHanhThe - lag(SaiNgayPhatHanhThe),
# delta_KhongNhanDuocXacThucOTP = KhongNhanDuocXacThucOTP - lag(KhongNhanDuocXacThucOTP),
# delta_TheDangOTrangThaiKhongGiaoDichDuoc = TheDangOTrangThaiKhongGiaoDichDuoc- lag(TheDangOTrangThaiKhongGiaoDichDuoc),
# delta_SaiOTP = SaiOTP - lag(SaiOTP),
# delta_TheDaLienKet = SaiOTP - lag(TheDaLienKet)
# )bank_A %>%
inner_join(number_of_failures_bank_A, by = "Date") %>%
mutate(month = month(Date)) %>%
group_by(month) %>%
summarise(
total_success = sum(GiaoDichThanhCong),
total_failure = sum(ThatBai),
success_rate = total_success/(total_success+total_failure)
)## # A tibble: 2 x 4
## month total_success total_failure success_rate
## <dbl> <dbl> <dbl> <dbl>
## 1 4 8401 22279 0.274
## 2 5 9322 20945 0.308
z_test_hypothesis <- function(s1, s2, f1, f2){
p_total <- (s1+s2)/(s1+f1+s2+f2)
p1_hat <- s1/(s1+f1)
p2_hat <- s2/(s2+f2)
p_hat <- ((s1+f1)*p1_hat + (s2+f2)*p2_hat)/(s1+f1+s2+f2)
z_obs <- (p1_hat-p2_hat)/sqrt(p_hat*(1-p_hat)*(1/(s1+f1) + 1/(s1+f1)))
p_value <- 1-pnorm(abs(z_obs))
return(c(p_total, p1_hat, p2_hat, p_value))
}
z_test_hypothesis(8401, 9322, 22279, 20945)## [1] 0.2907936 0.2738266 0.3079922 0.0000000
bank_B %>%
inner_join(number_of_failures_bank_B, by = "Date") %>%
mutate(month = month(Date)) %>%
group_by(month) %>%
summarise(
total_success = sum(GiaoDichThanhCong),
total_failure = sum(ThatBai),
success_rate = total_success/(total_success+total_failure)
)## # A tibble: 2 x 4
## month total_success total_failure success_rate
## <dbl> <dbl> <dbl> <dbl>
## 1 4 7020 7125 0.496
## 2 5 7236 6385 0.531
z_test_hypothesis(7020, 7236, 7125, 6385)## [1] 5.134337e-01 4.962884e-01 5.312385e-01 2.043925e-09
bank_C %>%
inner_join(number_of_failures_bank_C, by = "Date") %>%
mutate(month = month(Date)) %>%
group_by(month) %>%
summarise(
total_success = sum(GiaoDichThanhCong),
total_failure = sum(ThatBai),
success_rate = total_success/(total_success+total_failure)
)## # A tibble: 2 x 4
## month total_success total_failure success_rate
## <dbl> <dbl> <dbl> <dbl>
## 1 4 11229 30458 0.269
## 2 5 9268 27753 0.250
z_test_hypothesis(11229, 9268, 30458, 27753)## [1] 2.604183e-01 2.693646e-01 2.503444e-01 1.961483e-10
bank_D %>%
inner_join(number_of_failures_bank_D, by = "Date") %>%
mutate(month = month(Date)) %>%
group_by(month) %>%
summarise(
total_success = sum(GiaoDichThanhCong),
total_failure = sum(ThatBai),
success_rate = total_success/(total_success+total_failure)
)## # A tibble: 2 x 4
## month total_success total_failure success_rate
## <dbl> <dbl> <dbl> <dbl>
## 1 4 3607 20921 0.147
## 2 5 4952 19088 0.206
z_test_hypothesis(3607, 4952, 20921, 19088)## [1] 0.1762271 0.1470564 0.2059900 0.0000000
bank_A %>%
summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) ## # A tibble: 1 x 10
## TheChuaBatEcom GiaoDichThanhCong SaiSoDienThoai SaiTenChuThe SaiSoCMND
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 28646 17723 5874 3105 1848
## # ... with 5 more variables: SaiNgayPhatHanhThe <dbl>,
## # KhongNhanDuocXacThucOTP <dbl>, TheDangOTrangThaiKhongGiaoDichDuoc <dbl>,
## # SaiOTP <dbl>, TheDaLienKet <dbl>
bank_B %>%
summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) ## # A tibble: 1 x 12
## GiaoDichThanhCong KhongNhanDuocXacThucOTP HeThongNganHangDan~ KhongTimThayTho~
## <dbl> <dbl> <dbl> <dbl>
## 1 14256 3503 2968 2358
## # ... with 8 more variables: TheChuaBatEcom <dbl>, SaiOTP <dbl>,
## # SaiThongTinKhachHang <dbl>, TaiKhoanNganHangBiKhoa <dbl>,
## # HeThongDangCoLoi <dbl>, TheKhongHoTro <dbl>, OTPHetHan <dbl>,
## # TheDaLienKet <dbl>
bank_C %>%
summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) ## # A tibble: 1 x 6
## KhongNhanDuocPh~ GiaoDichThanhCo~ LienKetThatBai TheDaLienKet GiaoDichDangXuLy
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 47422 20497 8604 1938 166
## # ... with 1 more variable: ThongTinXacThucKhongHopLe <dbl>
bank_D %>%
summarise(across(where(is.numeric), ~ sum(.x, na.rm = TRUE))) ## # A tibble: 1 x 8
## KhongNhanDuocPhanHoiTuNgan~ GiaoDichThanhCo~ KhachHangHuyGia~ ThongTinDangNha~
## <dbl> <dbl> <dbl> <dbl>
## 1 29990 8559 8066 1467
## # ... with 4 more variables: HeThongNganHangDangCoLoi <dbl>,
## # TheDaLienKet <dbl>, HeThongNganHangDangCoLoi2 <dbl>, OTPHetHan <dbl>
# bank_A %>%
# pivot_longer(!Date, names_to = "name", values_to = "value") %>%
# select(name) %>%
# unique() %>%
# inner_join(bank_B %>%
# pivot_longer(!Date, names_to = "name", values_to = "value") %>%
# select(name) %>%
# unique(), by = "name")
#
# bank_C %>%
# pivot_longer(!Date, names_to = "name", values_to = "value") %>%
# select(name) %>%
# unique() %>%
# inner_join(bank_B %>%
# pivot_longer(!Date, names_to = "name", values_to = "value") %>%
# select(name) %>%
# unique(), by = "name")